{
 "metadata": {
  "name": "Module 3 DESeq"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "#having issues installing r to python coding. Instead I will be doing everything through R studio. \n#Using the Gonad Transcriptome from Mac, I ran an RNA-Seq analysis using CLC following the instructions provided from Steven's video.\n#Quick Run down. Import HTPsequence data to CLC. Run RNA-Seq Through CLC Server on two transcriptomes.\n#Set Up Experiment, compare two transcriptomes (gill, gonad) Export with labelled data to CSV file. \n#The compilation took several days but I was able to get a viable file set from it. \n#I exported the CSV file to Eagle into my bioinformatics file. From there I have downloaded them locally to use in R. \n#In the mean time I completed the trial script program Steven provided in Module 3. I will be modifying the coding in this script,\n#using R Studio to edit coding and run it. ",
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "cd C:\\Users\\Christine Savolainen\\Desktop\\Bio Informatics\\DESeq\\DESeq",
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": "C:\\Users\\Christine Savolainen\\Desktop\\Bio Informatics\\DESeq\\DESeq\n"
      }
     ],
     "prompt_number": 4
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "!head -2 GonadvGill.txt",
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": "head: cannot open `GonadvGill.txt' for reading: No such file or directory\n"
      }
     ],
     "prompt_number": 5
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "#using the following R code, converted CSV to TXT. \n#govgi = read.csv(\"GonadvsGill.csv\")\n#write.table(govgi,\"govgi.txt\", sep=\"\\t\")",
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 7
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "!head -2 govsgi.txt",
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": "\"Feature.ID\"\t\"Range..original.values.\"\t\"IQR..original.values.\"\t\"Difference..original.values.\"\t\"Fold.Change..original.values.\"\t\"Expression.values\"\t\"Gene.length\"\t\"Unique.gene.reads\"\t\"Total.gene.reads\"\t\"RPKM\"\t\"Means\"\t\"Expression.values.1\"\t\"Gene.length.1\"\t\"Unique.gene.reads.1\"\t\"Total.gene.reads.1\"\t\"RPKM.1\"\t\"Means.1\"\n\"1\"\t\"CGI_10000001\"\t\"190\"\t\"190\"\t\"190\"\t\"3.346\"\t\"81\"\t\"351\"\t\"0\"\t\"81\"\t\"12.668\"\t\"81\"\t\"271\"\t\"351\"\t\"0\"\t\"271\"\t\"17.384\"\t\"271\"\n"
      }
     ],
     "prompt_number": 5
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "!dos2unix < govsgi.txt > govsgiunix.txt",
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": "'dos2unix' is not recognized as an internal or external command,\noperable program or batch file.\n"
      }
     ],
     "prompt_number": 7
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "!sed 's/\\\"/ /g' -input govsgi.txt -output govsgi2.txt",
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": "sed: unknown option -- o\nUsage: sed [OPTION]... {script-only-if-no-other-script} [input-file]...\n\n  -n, --quiet, --silent\n                 suppress automatic printing of pattern space\n  -e script, --expression=script\n                 add the script to the commands to be executed\n  -f script-file, --file=script-file\n                 add the contents of script-file to the commands to be executed\n  --follow-symlinks\n                 follow symlinks when processing in place\n  -i[SUFFIX], --in-place[=SUFFIX]\n                 edit files in place (makes backup if SUFFIX supplied)\n  -b, --binary\n                 open files in binary mode (CR+LFs are not processed specially)\n  -l N, --line-length=N\n                 specify the desired line-wrap length for the `l' command\n  --posix\n                 disable all GNU extensions.\n  -r, --regexp-extended\n                 use extended regular expressions in the script.\n  -s, --separate\n                 consider files as separate rather than as a single continuous\n                 long stream.\n  -u, --unbuffered\n                 load minimal amounts of data from the input files and flush\n                 the output buffers more often\n  -z, --null-data\n                 separate lines by NUL characters\n      --help     display this help and exit\n      --version  output version information and exit\n\nIf no -e, --expression, -f, or --file option is given, then the first\nnon-option argument is taken as the sed script to interpret.  All\nremaining arguments are names of input files; if no input files are\nspecified, then the standard input is read.\n\nGNU sed home page: <http://www.gnu.org/software/sed/>.\nGeneral help using GNU software: <http://www.gnu.org/gethelp/>.\n"
      }
     ],
     "prompt_number": 9
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "!dir",
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": " Volume in drive C has no label.\n Volume Serial Number is 7C08-88ED\n\n Directory of C:\\Users\\Christine Savolainen\\Desktop\\Bio Informatics\\DESeq\\DESeq\n\n02/05/2014  04:31 PM    <DIR>          .\n02/05/2014  04:31 PM    <DIR>          ..\n01/25/2014  07:42 AM             6,148 .DS_Store\n01/25/2014  06:49 AM             2,824 .Rapp.history\n02/03/2014  06:44 PM         1,270,869 .RData\n02/03/2014  06:44 PM             1,419 .Rhistory\n02/04/2014  04:22 PM     1,565,843,336 BiGoRNA_GTGTCTAC_1 (paired)-1.zip\n02/03/2014  12:50 PM         1,270,869 DESeqTrialRun\n02/05/2014  01:11 PM         1,295,896 GonadvsGill.csv\n02/05/2014  04:30 PM         1,392,524 govgi.txt\n02/05/2014  04:14 PM         1,380,554 govgi3.txt\n02/05/2014  04:25 PM           202,401 govgi4.txt\n02/05/2014  04:27 PM           202,359 govgi5.txt\n02/05/2014  04:31 PM           202,398 govgi6.txt\n02/05/2014  04:31 PM           202,359 govgi7.txt\n02/05/2014  03:18 PM           961,606 govgi_SR.txt\n02/03/2014  12:49 PM            55,628 HistoDESeq.jpeg\n02/03/2014  12:48 PM         1,043,209 LT_DESeq.txt\n08/13/2012  12:30 PM           203,744 LT_UniqueCounts.txt\n08/06/2012  05:01 PM         1,001,372 MANUAL_DESeq.pdf\n02/03/2014  12:50 PM           108,086 PointPlotDESeq.jpeg\n02/03/2014  12:51 PM         1,270,869 RtrialRunDESeq.jpeg\n08/14/2012  12:34 PM             1,552 SCRIPT_DESeq_LT_no replication.R\n              21 File(s)  1,577,920,022 bytes\n               2 Dir(s)  136,759,033,856 bytes free\n"
      }
     ],
     "prompt_number": 87
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "!gawk -F '\\t' '{print $2,$9,$15}' < govsgi.txt > govsgi3.txt",
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 17
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "!head -2 govsgi3.txt",
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": "\"Range..original.values.\" \"Total.gene.reads\" \"Total.gene.reads.1\"\n\"CGI_10000001\" \"0\" \"0\"\n"
      }
     ],
     "prompt_number": 18
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "!sed 's/\"Range..original.values.\" \"Total.gene.reads\" \"Total.gene.reads.1\"/Feature Go Gi/g' < govsgi3.txt > govsgi4.txt",
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 19
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "!head -5 \"govsgi4.txt\"",
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": "Feature Go Gi\n\"CGI_10000001\" \"0\" \"0\"\n\"CGI_10000002\" \"44\" \"360\"\n\"CGI_10000003\" \"0\" \"24\"\n\"CGI_10000004\" \"25\" \"20\"\n"
      }
     ],
     "prompt_number": 20
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "from IPython.display import HTML\nHTML('<iframe src=http://eagle.fish.washington.edu/dermochelys/Bioinformatics/DESeqGovGiNotebook.html  width=700 height=350></iframe>')",
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": "<iframe src=http://eagle.fish.washington.edu/dermochelys/Bioinformatics/DESeqGovGiNotebook.html  width=700 height=350></iframe>",
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 103,
       "text": "<IPython.core.display.HTML at 0x542f7b8>"
      }
     ],
     "prompt_number": 103
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "#Rcode for cds <- newCountDataSet(CountData, Treatment) gives different error message on R Studio. \n#Error in round(countData) : non-numeric argument to mathematical function",
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "!sed '/[[:digit:]]/{s/,//g}' < govsgi4.txt > govsgi5.txt\n",
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 21
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "from IPython.display import HTML\nHTML('<iframe src=http://eagle.fish.washington.edu/dermochelys/Bioinformatics/DESeqGovGi2Notebook.html  width=700 height=350></iframe>')",
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": "<iframe src=http://eagle.fish.washington.edu/dermochelys/Bioinformatics/DESeqGovGi2Notebook.html  width=700 height=350></iframe>",
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 8,
       "text": "<IPython.core.display.HTML at 0x537cc50>"
      }
     ],
     "prompt_number": 8
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "from IPython.display import HTML\nHTML('<iframe src=http://eagle.fish.washington.edu/dermochelys/Bioinformatics/DESeqGovGi2.html  width=700 height=350></iframe>')",
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": "<iframe src=http://eagle.fish.washington.edu/dermochelys/Bioinformatics/DESeqGovGi2.html  width=700 height=350></iframe>",
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 16,
       "text": "<IPython.core.display.HTML at 0x5280d68>"
      }
     ],
     "prompt_number": 16
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "#Updated output to include gene ids, also changed write.table by adding \"quote = FALSE\" to eliminate quotes from table. ",
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}